<!DOCTYPE html>
<html lang="zh-Hans">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.3.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/blog/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/blog/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/blog/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/blog/images/logo.svg" color="#222">

<link rel="stylesheet" href="/blog/css/main.css">

<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic&display=swap&subset=latin,latin-ext">
<link rel="stylesheet" href="/blog/lib/font-awesome/css/font-awesome.min.css">


<script id="hexo-configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    hostname: new URL('https://guodh.gitee.io/blog').hostname,
    root: '/blog/',
    scheme: 'Pisces',
    version: '7.7.1',
    exturl: false,
    sidebar: {"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},
    copycode: {"enable":true,"show_result":true,"style":null},
    back2top: {"enable":true,"sidebar":false,"scrollpercent":false},
    bookmark: {"enable":false,"color":"#222","save":"auto"},
    fancybox: false,
    mediumzoom: false,
    lazyload: false,
    pangu: false,
    comments: {"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},
    algolia: {
      appID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    },
    localsearch: {"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},
    path: 'search.xml',
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}
  };
</script>

  <meta name="description" content="最近开始负责其对接Solr的业务，发现除了Solr的语句以及增量全量的方式，还需要很多细节点需要自己去关注。因为搜索组方面除了保证提供稳定性，还需要针对不同的业务场景提供不同的解决方案，因此了解Solr能够解决些什么问题，还是非常重要的。">
<meta property="og:type" content="article">
<meta property="og:title" content="Solr AND SolrCloud 学习记录">
<meta property="og:url" content="https://guodh.gitee.io/blog/2018/05/03/2018/Solr%E5%AD%A6%E4%B9%A0%E6%80%BB%E7%BB%93/index.html">
<meta property="og:site_name" content="蛋蛋的小屋">
<meta property="og:description" content="最近开始负责其对接Solr的业务，发现除了Solr的语句以及增量全量的方式，还需要很多细节点需要自己去关注。因为搜索组方面除了保证提供稳定性，还需要针对不同的业务场景提供不同的解决方案，因此了解Solr能够解决些什么问题，还是非常重要的。">
<meta property="og:locale">
<meta property="og:image" content="http://dl2.iteye.com/upload/attachment/0110/8781/f79e81a8-ae9c-381b-9d26-adb80f3d16fc.png">
<meta property="og:image" content="http://dl2.iteye.com/upload/attachment/0110/8787/95641581-1018-3963-9bbf-eb85033106e7.jpg">
<meta property="og:image" content="https://olwr1lamu.qnssl.com/%E6%8E%A8%E8%8D%90%E7%AE%97%E6%B3%95.png">
<meta property="article:published_time" content="2018-05-02T16:00:01.000Z">
<meta property="article:modified_time" content="2018-05-02T15:21:53.000Z">
<meta property="article:author" content="RunningEgg">
<meta property="article:tag" content="搜索">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://dl2.iteye.com/upload/attachment/0110/8781/f79e81a8-ae9c-381b-9d26-adb80f3d16fc.png">

<link rel="canonical" href="https://guodh.gitee.io/blog/2018/05/03/2018/Solr%E5%AD%A6%E4%B9%A0%E6%80%BB%E7%BB%93/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome: false,
    isPost: true
  };
</script>

  <title>Solr AND SolrCloud 学习记录 | 蛋蛋的小屋</title>
  






  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-meta">

    <div>
      <a href="/blog/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">蛋蛋的小屋</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
        <p class="site-subtitle">来了就看看吧，没有刀剑只有故事</p>
  </div>

  <div class="site-nav-toggle">
    <div class="toggle" aria-label="Toggle navigation bar">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>
</div>


<nav class="site-nav">
  
  <ul id="menu" class="menu">
        <li class="menu-item menu-item-home">

    <a href="/blog/" rel="section"><i class="fa fa-fw fa-home"></i>Home</a>

  </li>
        <li class="menu-item menu-item-about">

    <a href="/blog/about/" rel="section"><i class="fa fa-fw fa-user"></i>About</a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/blog/archives/" rel="section"><i class="fa fa-fw fa-archive"></i>Archives</a>

  </li>
        <li class="menu-item menu-item-book">

    <a href="/blog/book/" rel="section"><i class="fa fa-fw fa-archive"></i>book</a>

  </li>
  </ul>

</nav>
</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content">
            

  <div class="posts-expand">
      
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block " lang="zh-Hans">
    <link itemprop="mainEntityOfPage" href="https://guodh.gitee.io/blog/2018/05/03/2018/Solr%E5%AD%A6%E4%B9%A0%E6%80%BB%E7%BB%93/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/blog/images/avatar.gif">
      <meta itemprop="name" content="RunningEgg">
      <meta itemprop="description" content="写下我的点点滴滴，写下我对这个世界的看法，还有我内心锁崇拜的技术">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="蛋蛋的小屋">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          Solr AND SolrCloud 学习记录
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              <span class="post-meta-item-text">Posted on</span>

              <time title="Created: 2018-05-03 00:00:01" itemprop="dateCreated datePublished" datetime="2018-05-03T00:00:01+08:00">2018-05-03</time>
            </span>
              <span class="post-meta-item">
                <span class="post-meta-item-icon">
                  <i class="fa fa-calendar-check-o"></i>
                </span>
                <span class="post-meta-item-text">Edited on</span>
                <time title="Modified: 2018-05-02 23:21:53" itemprop="dateModified" datetime="2018-05-02T23:21:53+08:00">2018-05-02</time>
              </span>

          

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <p>最近开始负责其对接Solr的业务，发现除了Solr的语句以及增量全量的方式，还需要很多细节点需要自己去关注。因为搜索组方面除了保证提供稳定性，还需要针对不同的业务场景提供不同的解决方案，因此了解Solr能够解决些什么问题，还是非常重要的。</p>
<a id="more"></a> 

<h2 id="SolrCloud"><a href="#SolrCloud" class="headerlink" title="SolrCloud"></a>SolrCloud</h2><p>除了Solr的简单查询语句，对于使用者所必须理解的就是SolrCloud的整体架构。首先需要区分两个概念：</p>
<ul>
<li>shard</li>
<li>replica</li>
</ul>
<p>先从名词定义上来理解，shard指的是分片，replica指的是复制品。那么比较好理解了，当索引增长到一定的量，单台机器已经不能承载那么多的索引，必须将索引拆成多分，这是你就需要好多的shard的。<strong>shard</strong>是将索引切分成多分索引，来实现SolrCloud的可扩展性。</p>
<p>而都知道分布式系统最需要的CAP理论，对于C也是必须的。所以，每一份shard都必须要有多分<strong>replica</strong>，这样才能够保证即使有一台服务器挂了，还是能够对外提供服务。<br>对于有多分replica，如果要提升处理能力，肯定就需要选举出一个<strong>Leader</strong>来处理逻辑。所以，在replica中就有了Leader的概念。</p>
<img src="http://dl2.iteye.com/upload/attachment/0110/8781/f79e81a8-ae9c-381b-9d26-adb80f3d16fc.png" alt="solr shard结构"/>

<h3 id="增加索引的过程"><a href="#增加索引的过程" class="headerlink" title="增加索引的过程"></a>增加索引的过程</h3><ul>
<li>用户提交Doc到任意replica</li>
<li>replica会将请求转发到当前的Leader</li>
<li>Leader将更新结束的Doc分发到各个replica</li>
</ul>
<h3 id="查询过程"><a href="#查询过程" class="headerlink" title="查询过程"></a>查询过程</h3><p>与增加索引相对应的是查询的过程：</p>
<ul>
<li>用户将查询请求发送到任意replica</li>
<li>replica会将请求分解成多个子查询，分配到不同的shard的replica上(注意，这里是不需要经过Leader，每个节点都保存了所有节点信息)</li>
<li>replica整合所有得到的结果，返回用户</li>
</ul>
<img src="http://dl2.iteye.com/upload/attachment/0110/8787/95641581-1018-3963-9bbf-eb85033106e7.jpg" alt="增加索引的过程"/>

<h3 id="tLog"><a href="#tLog" class="headerlink" title="tLog"></a>tLog</h3><p>这里还需要介绍下tlog。在Lucene是没有soft Commit这个概念的，只有Hard Conmmit。而Solr Cloud为了保证，写入的索引能立即可见，提出了soft Commit。将索引先写到memory中，搜索的时候是memory 和 disk合并的结果。是不是和LSM Tree有点类似 ~<br>tlog就是为了软提交所存在，当索引还没内写入到硬盘中，这是断电了索引不就丢了。所以，每一次soft Commit都是先写入tlog，在写入到memory中。当然tlog还有其他大用，在我司的业务中，每天晚上的凌晨会对solr进行全量，来对索引的merge进行优化。这个时候就存在，增量丢失，而通过再次消费tlog中的日志，就能保证增量的数据不丢失。</p>
<h3 id="故障恢复过程-AND-新replica"><a href="#故障恢复过程-AND-新replica" class="headerlink" title="故障恢复过程 AND 新replica"></a>故障恢复过程 AND 新replica</h3><p>SolrCloud提供了两种恢复方式:</p>
<ul>
<li>对等同步</li>
<li>快照复制</li>
</ul>
<p>前者用于最近的部分丢失，而后者是由于处于脱机导致不能同步，需要将所有信息同步。而且更强大的是，无论哪种情况SolrCloud都会自动为你选择。<br>因此，类似的场景往往发生在需要新增一个replica来降低每台服务器的负载（使用默认的文档策略不能使不同的shard拥有不同数量的replica，但是自定义模式下可以）。这是只需要启动一个Solr实例，并将shardId赋予给它。在启动参数内添加 -Dshard=Id，那么就能够创建一个新的副本。</p>
<h3 id="由于Solr涉及的内容较多，容笔者慢慢记录下学习的过程"><a href="#由于Solr涉及的内容较多，容笔者慢慢记录下学习的过程" class="headerlink" title="由于Solr涉及的内容较多，容笔者慢慢记录下学习的过程"></a>由于Solr涉及的内容较多，容笔者慢慢记录下学习的过程</h3><p>内容包括：</p>
<ul>
<li>mmseg分词算法</li>
<li>搜索推荐、协同过滤等</li>
</ul>
<img src="https://olwr1lamu.qnssl.com/%E6%8E%A8%E8%8D%90%E7%AE%97%E6%B3%95.png" alt="推荐算法发展"/>


<h2 id="参考资料"><a href="#参考资料" class="headerlink" title="参考资料"></a>参考资料</h2><p>SolrCloud之分布式索引及与Zookeeper的集成：<a href="http://josh-persistence.iteye.com/blog/2234411">http://josh-persistence.iteye.com/blog/2234411</a><br>Solr实战（实体书）</p>

    </div>

    
    
    

      <footer class="post-footer">
          <div class="post-tags">
              <a href="/blog/tags/%E6%90%9C%E7%B4%A2/" rel="tag"># 搜索</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/blog/2018/04/21/2018/2018-04-19/" rel="prev" title="数据库MVCC、事务回滚">
      <i class="fa fa-chevron-left"></i> 数据库MVCC、事务回滚
    </a></div>
      <div class="post-nav-item">
    <a href="/blog/2018/06/16/2018/%E9%9D%A2%E8%AF%95%E5%B0%8F%E7%BB%93/" rel="next" title="面试小结">
      面试小结 <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  

  </div>


          </div>
          

<script>
  window.addEventListener('tabs:register', () => {
    let activeClass = CONFIG.comments.activeClass;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          Table of Contents
        </li>
        <li class="sidebar-nav-overview">
          Overview
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#SolrCloud"><span class="nav-number">1.</span> <span class="nav-text">SolrCloud</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%A2%9E%E5%8A%A0%E7%B4%A2%E5%BC%95%E7%9A%84%E8%BF%87%E7%A8%8B"><span class="nav-number">1.1.</span> <span class="nav-text">增加索引的过程</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%9F%A5%E8%AF%A2%E8%BF%87%E7%A8%8B"><span class="nav-number">1.2.</span> <span class="nav-text">查询过程</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#tLog"><span class="nav-number">1.3.</span> <span class="nav-text">tLog</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%95%85%E9%9A%9C%E6%81%A2%E5%A4%8D%E8%BF%87%E7%A8%8B-AND-%E6%96%B0replica"><span class="nav-number">1.4.</span> <span class="nav-text">故障恢复过程 AND 新replica</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E7%94%B1%E4%BA%8ESolr%E6%B6%89%E5%8F%8A%E7%9A%84%E5%86%85%E5%AE%B9%E8%BE%83%E5%A4%9A%EF%BC%8C%E5%AE%B9%E7%AC%94%E8%80%85%E6%85%A2%E6%85%A2%E8%AE%B0%E5%BD%95%E4%B8%8B%E5%AD%A6%E4%B9%A0%E7%9A%84%E8%BF%87%E7%A8%8B"><span class="nav-number">1.5.</span> <span class="nav-text">由于Solr涉及的内容较多，容笔者慢慢记录下学习的过程</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99"><span class="nav-number">2.</span> <span class="nav-text">参考资料</span></a></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
  <p class="site-author-name" itemprop="name">RunningEgg</p>
  <div class="site-description" itemprop="description">写下我的点点滴滴，写下我对这个世界的看法，还有我内心锁崇拜的技术</div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/blog/archives/">
        
          <span class="site-state-item-count">89</span>
          <span class="site-state-item-name">posts</span>
        </a>
      </div>
      <div class="site-state-item site-state-tags">
        <span class="site-state-item-count">24</span>
        <span class="site-state-item-name">tags</span>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="mailto:guodh_1@163.com" title="E-Mail → mailto:guodh_1@163.com" rel="noopener" target="_blank"><i class="fa fa-fw fa-envelope"></i>E-Mail</a>
      </span>
  </div>



      </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

<div class="copyright">
  
  &copy; 
  <span itemprop="copyrightYear">2022</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">RunningEgg</span>
</div>
  <div class="powered-by">Powered by <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> v5.3.0
  </div>
  <span class="post-meta-divider">|</span>
  <div class="theme-info">Theme – <a href="https://pisces.theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Pisces</a> v7.7.1
  </div>

        








      </div>
    </footer>
  </div>

  
  <script src="/blog/lib/anime.min.js"></script>
  <script src="/blog/lib/velocity/velocity.min.js"></script>
  <script src="/blog/lib/velocity/velocity.ui.min.js"></script>

<script src="/blog/js/utils.js"></script>

<script src="/blog/js/motion.js"></script>


<script src="/blog/js/schemes/pisces.js"></script>


<script src="/blog/js/next-boot.js"></script>




  















  

  

</body>
</html>
